/* Set up the log*/
cd $ohie
cap log close
global sysdate: disp %tdYYNNDD  date("`c(current_date)'", "DMY")
qui log using 	"./logs/preER_adv_seln_$sysdate.log", replace

/*----------------------------------------------------------------------*/
/* PROGRAM: preER_adv_seln.do						*/
/*									*/
/* PURPOSE:								*/
/* [*]	This code calculates previous ER utilization across the three   */
/*	compliance types. previous ER utilization is a discrete 	*/
/*	variable here.							*/
/*									*/					
/* OUTPUT:								*/
/* [*]	preER_adv_seln.xls: This file contains the stats.	*/
/*									*/
/*----------------------------------------------------------------------*/

* Set up the main Stata settings 
clear
clear mata
set matsize 3000
set type double

*********************************************
* RUN CODE
*********************************************

use $final/oregonnumhh1.dta, clear

*Bins of preER
gen preER1 = (Y_num_pre>=1) 
gen preER2 = (Y_num_pre>=2)
gen preER3 = (Y_num_pre>=3)
gen preER4 = (Y_num_pre>=4)
drop if Y_num_pre==.

* pC is defined as P(D=1|Z=0)
qui su any_medicaid if Z == 0 
local pC = `r(mean)'
		
* pI is defined as P(D=1|Z=1)
qui su any_medicaid if Z == 1 
local pI = `r(mean)'

qui: summ Z
local pZ = `r(mean)'

matrix define OUT = J(1, 22, 0)

local c = 0
forv i = 1/4 {
	local c = `c'+1
	qui: summ preER`i' if Z==0 & any_medi==1
	local AT`i' = `r(mean)'
	forv d = 0/1{
		forv z = 0/1{
                qui: summ preER`i' [w=w] if any_medicaid==`d' & Z==`z'
                local Ecov`i'`d'`z' = `r(mean)'
        }
        }
        local C`i' =`pZ'*(`pI'/(`pI'-`pC') *`Ecov`i'11' - `pC'/(`pI'-`pC') *`Ecov`i'10') + (1-`pZ') *((1-`pC')/(`pI'-`pC')*`Ecov`i'00' - (1-`pI')/(`pI'-`pC')*`Ecov`i'01')
	qui: summ preER`i' if Z==1 & any_medi==0
	local NT`i' = `r(mean)'
	
	matrix OUT[1, `=5*(`c'-1)+1'] = `AT`i''
        matrix OUT[1, `=5*(`c'-1)+2'] = `C`i''
        matrix OUT[1, `=5*(`c'-1)+3'] = `NT`i''
	
	*calc line of best fit for each bin
	preserve
	clear
	set obs 3
	gen p = . 
	replace p = `pC'/2 in 1
	replace p = (`pC'+`pI')/2 in 2
	replace p = (`pI'+1)/2 in 3
	gen preER`i' = .
	replace preER`i' = `AT`i'' in 1
	replace preER`i' = `C`i'' in 2
	replace preER`i' = `NT`i'' in 3
	reg preER`i' p
	matrix OUT[1, `=5*(`c'-1)+4'] = _b[p]
	matrix OUT[1, `=5*(`c'-1)+5'] = _b[_cons]
	restore
}


matrix OUT[1, 21] = `pC'
matrix OUT[1, 22] = `pI'

forv i = 1/4 {
	local cols `cols' ATpreER`i' CpreER`i' NTpreER`i' mpreER`i' conspreER`i'
}

matrix colnames OUT = `cols' pB pI

matrix list OUT

putexcel set $output/preER_adv_seln.xls, replace
putexcel A1 = matrix(OUT), colnames

*************************************************

*This section of code tests whether the adverse selection is linear in UD

matrix define OUT2 = J(1, 4, 0)

gen DZ = any_medicaid * Z

*Loop through the buckets of pre_ER
forv i = 1/4 {


*Locals to map the coefficients to labels
local mu  _b[eq3:_cons]
local lam _b[eq2:_cons]
local lamZ _b[eq2:Z]
local gam _b[eq1:_cons]
local gamD _b[eq1:any_medicaid]
local gamZ _b[eq1:Z]
local gamDZ _b[eq1:DZ]

*Locals for the components of the test stat
local pI (`lam'+`lamZ')
local pC (`lam')
local XAT (`gam'+`gamD')
local XNT (`gam'+`gamZ')
local XC (`mu'*(`pI'*(`gam'+`gamD'+`gamZ'+`gamDZ')/(`pI'-`pC')-`pC'*(`gam'+`gamD')/(`pI'-`pC'))+(1-`mu')*((1-`pC')*`gam'/(`pI'-`pC')-(1-`pI')*(`gam'+`gamZ')/(`pI'-`pC')))
local TS (`XC'-(1-`pC')*`XAT'/(1-`pC'+`pI')-`pI'*`XNT'/(1-`pC'+`pI'))

*bootstrap the ts
bootstrap ts_`i'=`TS', level(90) seed(6574360) reps(1000): sureg (eq1: preER`i' any_medicaid Z DZ) (eq2: any_medicaid Z) (eq3: Z) 

*Get the upper 90% CI 
matrix define ci_mat = e(ci_percentile)
local ci_U = ci_mat[2,1]
matrix OUT2[1,`i'] = `ci_U'
local cols2 `cols2' preER`i'_lin_ci_U


*Verify that all the components are correct
qui: sureg (eq1: preER`i' any_medicaid Z DZ) (eq2: any_medicaid Z) (eq3: Z)
dis `pC'
dis `pI'
dis `XAT'
dis `XNT'
dis `XC'
dis `TS'

}

matrix colnames OUT2 = `cols2'
matrix list OUT2

putexcel A5 = matrix(OUT2), colnames

capture log close
